
clear all

/// Compute county-level Representation Indices using county-district-year data and collapse to county-year level

cd "$OutputPath/"
use county_sldu_pop_elec, clear // a county-sldu dataset

drop if pop==0 

tempfile base
save `base', replace 

// Merge with county-level characteristics for education measure (ACS 5 year estimates from NHGIS)
cd "$OutputPath/"
use "nhgis_county", clear

gen pbachelors=bachelors/population
keep pbachelors year fstate fcounty
merge 1:m year fstate fcounty using `base', gen(m_edu)

drop if m_edu==1
drop m_edu

save `base', replace

// district-level and county-level characteristics

rename (population) (pop)
gen edu=pbachelor*pop

foreach i in pop vap cvap edu {
	egen d_`i'=total(`i'), by(year fstate sldu) missing
	egen c_`i'=total(`i'), by(year fstate fcounty) missing
}

// stats to measure counties vs. districts
gen pdistrictinc_u=vap/d_vap
gen pcountyind_u=vap/c_vap

gen ndistricts_u=1 //to be summed in collapse
gen nwholerep_u=0
replace nwholerep_u=nmember if pdistrictinc==1 //to be summed in collapse
gen nsharedrep_u=0
replace nsharedrep_u=nmember if pdistrictinc<1 & pdistrictinc>0 //to be summed in collapse

egen neighbors_u=count(fcounty), by(year state sldu) // Count counties in each district -- calculate neighbors
replace neighbors_u=neighbors-1 //to be summed in collapse


// assign election year turnout to fiscal year
foreach i in d r {
gen turnout_`i'=g2004_sts_`i'v if year==2007
replace turnout_`i'=g2003_sts_`i'v if year==2007 & (state=="Louisiana" | state=="New Jersey")
replace turnout_`i'=g2002_sts_`i'v if year==2007 & (state=="Texas")

replace turnout_`i'=g2010_sts_`i'v if year==2012
replace turnout_`i'=g2007_sts_`i'v if year==2012 & (state=="Louisiana" | state=="New Jersey")

replace turnout_`i'=g2014_sts_`i'v if year==2017
replace turnout_`i'=g2012_sts_`i'v if year==2017 & ( state=="Texas" ) 

} 

gen turnout=turnout_d+turnout_r

gen dvoteshare_u=turnout_d/turnout
gen rvoteshare_u=turnout_r/turnout

egen c_turnout=total(turnout), by(fstate fcounty year) missing
egen c_turnout_d=total(turnout_d), by(fstate fcounty year) missing
gen  c_dvoteshare=c_turnout_d/c_turnout

egen d_turnout_d=total(turnout_d), by(fstate sldu year) missing
egen d_turnout_r=total(turnout_r), by(fstate sldu year) missing
gen  d_turnout=d_turnout_d+d_turnout_r

gen d_dvoteshare=d_turnout_d/d_turnout

egen s_turnout_d=total(turnout_d), by(year fstate) missing
egen s_turnout_r=total(turnout_r), by(year fstate) missing
gen s_turnout=s_turnout_d+s_turnout_r


//county-level std. dev. in D two-party vote share over study period
egen dvotesd=sd(c_dvoteshare), by(fstate fcounty) 
egen d_dvotesd=total(dvotesd), by(year fstate sldu) missing

egen d_cvap_swing=total(cvap*dvotesd)
/// Compute district-level election outcomes: uncontested, winner, decisiveness, swingness, and closeness

// uncontested elections have 0 turnout in some states or missing in other states
gen uncontested=(d_turnout_d==0 | d_turnout_r==0)
replace uncontested=. if s_turnout==. 

// Democrat wins, winning votes
gen dwins=(d_turnout_d>d_turnout_r)
replace dwins=. if d_turnout_d==.

gen winning= dwins*turnout_d + (1-dwins)*turnout_r
egen d_winning=total(winning), by(year fstate sldu) missing


egen slduflag=tag(fstate sldu year) //need to compute state-level sigma before summing across districts.
egen sigma=total(slduflag*(d_dvoteshare*(1-d_dvoteshare))), by(fstate year) missing

gen swing=1/(1+abs(turnout_d-turnout_r)/turnout)
egen d_swing=total(1/(1+abs(turnout_d-turnout_r)/turnout)), by(year fstate sldu) missing

gen close_30_u=(abs((turnout_d-turnout_r)/turnout)<0.3)
replace close_30_u=. if turnout==.


//Measures of psi, total share of votes (to be summed in collapse)
foreach i in cvap {
	gen psi_`i'_u=nmember*(1-(`i'/d_`i'))
	replace psi_`i'_u=0 if psi_`i'_u<0 // some values are -1e-7 due to stata's precision, replace with zero
	
	gen TSV_`i'_u=nmember*`i'/d_`i' 
//	gen TSV_`i'_swing_u=nmember*(`i'*dvotesd)/(d_`i'*d_dvotesd)
}

// Collapse to district, extract legislature statistics
rename nmember nmember_u 

save `base', replace

collapse (mean)  uncontested demwon repwon indwon totaldistricts nmember ndistricts (sum) pop (first) state , by(year fstate sldu)

egen demsinhouse_u=total(demwon), by(year fstate) missing
egen repsinhouse_u=total(repwon), by(year fstate) missing
gen demmajority_u=(demsinhouse>repsinhouse )
replace demmajority_u=. if demsinhouse==. | repsinhouse==.

rename pop district_population_u

rename (uncontested demwon repwon indwon totaldistricts nmember ndistricts) (uncontested_u demwon_u repwon_u indwon_u totaldistricts_u nmember_u ndistricts_u )

drop nmember_u ndistricts_u state  //only needed at district-level for district dataset -- collapse to county level next.

merge 1:m year fstate sldu using `base'
drop _merge

///  Collapse to county level 

local meanvars totaldistricts_u demmajority_u 
local sumvars TSV_* psi* ndistricts_u nwholerep_u nsharedrep_u neighbors_u nmember_u

local sumvars_nomiss close_30_u uncontested_u demwon_u repwon_u indwon_u  //missing if uncontested, don't drop from sample by setting to missing


foreach j of varlist `meanvars' `sumvars'   {
gen allmiss_`j'=(`j'==.) //take mean in collapse
}


collapse (sum) `sumvars' `sumvars_nomiss' (mean) `meanvars' allmiss*  (first) state , by(year fstate fcounty)


foreach j of varlist `meanvars' `sumvars' {
replace `j'=. if allmiss_`j'==1
}

drop allmiss*


gen anyclose_30_u=close_30>0
replace anyclose_30_u=. if close_30==.
drop close_30


cd "$OutputPath/"
save county_pop_elec_upper, replace

